This is hopefully the results section for the Study 2 NSE & SE babies watching ASL Stories. We have two main factors:

  1. Language (Sign v. English)
  2. Direction (Forward v. Reversed)

Ages of babies are from 5 to 14.99 months. That means we lose two “old” CODA babies - Janine (16 mo) and za02 (23 mo). The dataset also contains factors for younger v. older babies IF we want to do age group splits. I do not recommend it because we lose all significance that way and we have more precise age information, better than brute groups. IF we do use it, the baby age groups are such:

  • Younger babies = 5 to 8.99 months
  • Older babies = 9.0 to 14.99 months

To skip to the XY Data, go here. To skip to the AOI/FCR analysis, go here.

Demographics

library(tidyverse)
library(janitor)
library(lme4)
library(lmerTest)
library(scales)
library(feather)
library(GGally)
# Grab data that was produced in 03importcleanbabies.Rmd
babies <- read_feather("cleanedbabyeyedata.feather") %>%
  mutate(age = age*12) %>%
  select(participant, language, age, gender, story, direction, mark, trial, repetition, aoi, secs, percent) %>%
  rename(name = participant) %>%
  mutate(agegroup = case_when(
    age <= 8.99 ~ "younger",
    age >= 9.0 & age < 15 ~ "older"
  )) %>%
  filter(!is.na(agegroup)) %>%
  mutate(language = case_when(
    language == "english" ~ "NSE",
    language =="sign" ~ "SE"
  )) %>%
  rename(lang = language) %>%
  mutate(lang2 = case_when(
    name == "aubrey CODA 11m" ~ "HSE",
    name == "GemmaF_11_4_13_CODA" ~ "HSE",
    name == "Hannah_CODA_7m" ~ "HSE",
    name == "ke11es12_7m_MomTerp" ~ "LSE",
    name == "Miles_14m_SE" ~ "HSE",
    name == "Parker 6 m SIGNING" ~ "HSE",
    name == "Penn 6 months SIGN EXPOSED" ~ "HSE",
    name == "Trinity 8m" ~ "LSE",
    name == "Victoria07_18_17" ~ "LSE",
    name == "Wells_8mos" ~ "LSE",
    TRUE ~ lang
  ))
#  filter(name != "Victoria07_18_17")
babiesinfo <- babies %>%
  select(name, lang, age, gender) %>%
  distinct() %>%
  group_by(lang) %>%
  summarise(N = n(),
            age_mean = mean(age),
            sd = sd(age),
            min = min(age),
            max = max(age))
genders <- babies %>%
  select(name, lang, age, gender) %>%
  distinct() %>%
  group_by(lang, gender) %>%
  summarise(N = n()) %>%
  spread(gender, N)
babiesinfo <- left_join(babiesinfo, genders) %>%
  select(lang, N, Female, Male, age_mean, sd, min, max) %>%
  print()
babies$agegroup <- fct_relevel(babies$agegroup, c("younger","older"))
# IF we do age groups, use this code
# 
# babiesinfo <- babies %>%
#   select(name, lang, age, agegroup, gender) %>%
#   distinct() %>%
#   group_by(lang, agegroup) %>%
#   summarise(N = n(),
#             age_mean = mean(age),
#             sd = sd(age),
#             min = min(age),
#             max = max(age))
# 
# genders <- babies %>%
#   select(name, lang, age, agegroup, gender) %>%
#   distinct() %>%
#   group_by(lang, agegroup, gender) %>%
#   summarise(N = n()) %>%
#   spread(gender, N)
# 
# babiesinfo <- left_join(babiesinfo, genders) %>%
#   select(lang, agegroup, N, Female, Male, age_mean, sd, min, max) %>%
#   print()

Global Looking

We calculated percentages based on overall clip length as the denominator. In this way, we can meaningfully contrast looking times at the videos (which are variable lengths) based on different factors. But when we go to AOI analysis we need to re-calculate the percentages so the denominator is based on total looking time, not overall clip length.

The chart below shows little differences based on factors Age, Language, or Direction. That’s good, means the videos were equally engaging for all babies, right?

babies$lang <- as.factor(babies$lang)
babies_overall_looking <- babies %>%
  group_by(name, age, lang, direction, story, repetition) %>%
  summarise(percent = sum(percent)) # gets total looking percent for each trial for each baby
# Table of means
babies_overall_looking %>% 
  group_by(name, lang, direction) %>%
  summarise(percent = mean(percent)) %>% # get average looking percent for each baby
  group_by(lang, direction) %>%
  summarise(mean_percent = mean(percent),
            count = n(),
            sd = sd(percent),
            se = sd/sqrt(count)) %>%
  select(-sd) %>%
  print()
ggplot(babies_overall_looking, aes(x = age, y = percent, color = direction, fill = direction)) + 
  geom_jitter(alpha = 0.5) +
  facet_grid(. ~ lang) +
  geom_smooth(method = "lm", se = FALSE) +
  ggtitle("Video Attention") +
  xlab("age (months)") +
  ylab("percent looking") + 
  theme_bw() + 
  scale_y_continuous(limits = c(0,1), labels = percent) 

# Plot
# babies_overall_looking %>% 
#   group_by(lang, direction, name) %>%
#   summarise(percent = mean(percent)) %>% # gets average looking percent for each baby
#   group_by(lang, direction) %>%
#   summarise(mean_percent = mean(percent), # gets group averages
#             count = n(),
#             sd = sd(percent),
#             se = sd/sqrt(count)) %>% 
#   ggplot(aes(x = lang, y = mean_percent, fill = direction)) + 
#   geom_col(position = "dodge") + 
#   geom_errorbar(aes(ymin = mean_percent - se, ymax = mean_percent + se), 
#                 position = position_dodge(width = 0.9), width = 0.25) + 
#   scale_y_continuous(limits = c(0,1), labels = percent) +
#   theme_minimal() + 
#   theme(panel.grid.major.x = element_blank()) +
# #  facet_wrap("lang") +
#   ggtitle("Video Attention") +
#   xlab("") +
#   ylab("percent looking")
# babies_overall_looking %>%
#   ggplot(aes(x = lang, y = percent, fill = direction)) +
#   facet_wrap("agegroup") + 
#   geom_violin()

A linear model shows there were no significant effects of any factors. We can conclude there was no effect on how much the babies looked at the stimuli.

global_lm <- lmer(percent ~ age + lang * direction + (direction|name) + (direction|story), data = babies_overall_looking)
summary(global_lm) 
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: percent ~ age + lang * direction + (direction | name) + (direction |      story)
   Data: babies_overall_looking

REML criterion at convergence: -109.5

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.17439 -0.71127 -0.01207  0.70541  2.47192 

Random effects:
 Groups   Name              Variance  Std.Dev. Corr 
 name     (Intercept)       0.0103084 0.10153       
          directionreversed 0.0009282 0.03047  1.00 
 story    (Intercept)       0.0086180 0.09283       
          directionreversed 0.0039286 0.06268  -0.76
 Residual                   0.0330635 0.18183       
Number of obs: 349, groups:  name, 26; story, 8

Fixed effects:
                          Estimate Std. Error        df t value Pr(>|t|)    
(Intercept)               0.679103   0.086923 28.572445   7.813 1.43e-08 ***
age                       0.002021   0.008959 23.305300   0.226    0.824    
langSE                   -0.049624   0.050507 23.785326  -0.983    0.336    
directionreversed        -0.033434   0.034821 12.347582  -0.960    0.355    
langSE:directionreversed  0.019263   0.041732 94.210799   0.462    0.645    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) age    langSE drctnr
age         -0.854                     
langSE      -0.051 -0.197              
dirctnrvrsd -0.228  0.006  0.068       
lngSE:drctn  0.034 -0.001 -0.126 -0.497
#ggcoef(global_lm)

AOI Looking

Now we’ll re-calculate the percentages so the denominator is based on total looking time. All AOIs should sum up to 100% for each trial and each baby. Next let’s make a boxplot of all AOIs.

#Recalculate percent
babies <- babies %>%
  select(-percent) %>%
  group_by(name, lang, agegroup, age, direction, story, mark, trial, repetition, gender) %>%
  mutate(totalsec = sum(secs)) %>%
  group_by(name, lang, agegroup, age, direction, story, mark, trial, repetition, gender, aoi) %>%
  mutate(percent = secs/totalsec)
# Boxplot
babies %>%
  ggplot(aes(x = aoi, y = percent, fill = direction)) + 
  geom_boxplot() +
  ggtitle("AOI Attention") +
  theme_bw() + 
  xlab("") +
  theme(axis.text.x = element_text(angle=45, hjust = 1),
        panel.grid.major.x = element_blank()) +
  scale_y_continuous(labels = scales::percent, limits = c(0,1))

It appears two important AOIs are MidChestTop and MidFaceBottom. Let’s look again only at midline AOIs:

midline = c("Belly","BelowChest","MidChestBottom","MidChestCenter","MidChestTop",
            "MidFaceBottom","MidFaceCenter","MidFaceTop")
babies %>%
  filter(aoi %in% midline) %>%
  ggplot(aes(x = aoi, y = percent, fill = direction)) + 
  geom_boxplot() +
  ggtitle("Midline AOI Attention") +
  theme_bw() + 
  xlab("") +
  theme(axis.text.x = element_text(angle=45, hjust = 1),
        panel.grid.major.x = element_blank()) +
  scale_y_continuous(labels = scales::percent, limits = c(0,1))

I’m going to run linear models with only MidChestTop or MidFaceBottom, and see what happens. No age interactions.

MidChestTop:

  • Significant effect of age (p = 0.004) - older babies look at MidChestTop AOI more.
  • Significant effect of language (p = 0.006) - NSE babies look +14.6% more than SE babies.
  • Significant effect of direction (p = 0.046) - Babies look about 4.5% less for reversed
  • No language X direction interaction.

MidFaceBottom:

  • No effect of age
  • Significant effect of language (p = 0.004) - SE babies look +21% more than NSE babies.
  • Marginal effect of language (p = 0.073) - Babies look about 2% more for reversed.
  • No language X direction interaction.
babies %>%
  filter(aoi %in% c("MidFaceBottom","MidChestTop")) %>%
  ggplot(aes(x = age, y = percent, color = direction, fill = direction)) + 
  geom_jitter(alpha = 0.5) + 
  geom_smooth(method = "lm", se = FALSE) +
  scale_y_continuous(limits = c(0,1), labels = percent) +
  theme_bw() + 
#  theme(panel.grid.major.x = element_blank()) +
  facet_grid(aoi ~ lang) +
  ggtitle("AOI Attention") +
  xlab("") +
  ylab("percent looking")

midchesttop_lm <- lmer(percent ~ age + lang * direction + (1|name) + (1|story), data = filter(babies, aoi == "MidChestTop"))
summary(midchesttop_lm)
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: percent ~ age + lang * direction + (1 | name) + (1 | story)
   Data: filter(babies, aoi == "MidChestTop")

REML criterion at convergence: -215.2

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.0246 -0.7130 -0.1414  0.6022  2.6752 

Random effects:
 Groups   Name        Variance  Std.Dev.
 name     (Intercept) 0.0114441 0.1070  
 story    (Intercept) 0.0002891 0.0170  
 Residual             0.0253365 0.1592  
Number of obs: 349, groups:  name, 26; story, 8

Fixed effects:
                           Estimate Std. Error         df t value Pr(>|t|)   
(Intercept)                0.110030   0.076921  23.643715   1.430  0.16568   
age                        0.029452   0.008477  22.192885   3.474  0.00213 **
langSE                    -0.134837   0.050436  27.916006  -2.673  0.01240 * 
directionreversed         -0.043631   0.022400 320.218547  -1.948  0.05232 . 
langSE:directionreversed   0.038910   0.034773 317.931790   1.119  0.26400   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) age    langSE drctnr
age         -0.912                     
langSE      -0.076 -0.188              
dirctnrvrsd -0.137 -0.002  0.213       
lngSE:drctn  0.084  0.006 -0.336 -0.645
#ggcoef(midchesttop_lm)
midfacebottom_lm <- lmer(percent ~ age + lang * direction + (1|name) + (1|story), data = filter(babies, aoi == "MidFaceBottom"))
summary(midfacebottom_lm)
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: percent ~ age + lang * direction + (1 | name) + (1 | story)
   Data: filter(babies, aoi == "MidFaceBottom")

REML criterion at convergence: -157.6

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.42325 -0.77817 -0.02519  0.70580  2.50539 

Random effects:
 Groups   Name        Variance Std.Dev.
 name     (Intercept) 0.019877 0.14099 
 story    (Intercept) 0.002498 0.04998 
 Residual             0.028516 0.16887 
Number of obs: 349, groups:  name, 26; story, 8

Fixed effects:
                           Estimate Std. Error         df t value Pr(>|t|)   
(Intercept)                0.334139   0.099651  25.233307   3.353  0.00253 **
age                       -0.008939   0.010886  22.948912  -0.821  0.42004   
langSE                     0.193693   0.063648  26.900910   3.043  0.00518 **
directionreversed          0.038727   0.023881 317.208664   1.622  0.10587   
langSE:directionreversed  -0.052616   0.036996 315.990174  -1.422  0.15595   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) age    langSE drctnr
age         -0.903                     
langSE      -0.065 -0.193              
dirctnrvrsd -0.112 -0.002  0.179       
lngSE:drctn  0.069  0.006 -0.283 -0.647
#ggcoef(midfacebottom_lm)
# Bar chart
# babies %>%
#   filter(aoi %in% c("MidFaceBottom","MidChestTop")) %>%
#   group_by(agegroup, lang, direction, name, aoi) %>%
#   summarise(percent = mean(percent)) %>% # gets average looking percent for each baby
#   group_by(agegroup, lang, direction, aoi) %>%
#   summarise(mean_percent = mean(percent), # gets group averages
#             count = n(),
#             sd = sd(percent),
#             se = sd/sqrt(count)) %>% 
#   ggplot(aes(x = lang, y = mean_percent, fill = direction)) + 
#   geom_col(position = "dodge") + 
#   geom_errorbar(aes(ymin = mean_percent - se, ymax = mean_percent + se), 
#                 position = position_dodge(width = 0.9), width = 0.25) + 
#   scale_y_continuous(limits = c(0,1), labels = percent) +
#   theme_minimal() + 
#   theme(panel.grid.major.x = element_blank()) +
#   facet_grid(aoi ~ agegroup) +
#   ggtitle("Video Attention") +
#   xlab("") +
#   ylab("percent looking")

Face-Chest Ratio

Next, we’ll define a Face-Chest Ratio (FCR) such that:

  1. MidFaceCenter, MidFaceBottom = Face
  2. MidChestTop, MidChestCenter, MidChestBottom, BelowChest = Chest
  3. FCR = face - chest / face + chest

We did not include Belly or MidFaceTop because of very low looking rates according to the boxplots above.

babies_fcr <- babies %>%
  spread(aoi,percent) %>%
  group_by(name, age, agegroup, lang, gender, direction, story, repetition) %>%
  summarise(face = sum(MidFaceCenter, MidFaceBottom, na.rm = TRUE),
         chest = sum(MidChestTop, MidChestCenter, MidChestBottom, BelowChest, na.rm = TRUE),
         fcr = (face - chest) / (face + chest))
# Table of means
babies_fcr %>% 
  group_by(lang, direction, name) %>%
  summarise(fcr = mean(fcr)) %>% # gets average looking percent for each baby
  group_by(lang, direction) %>%
  summarise(mean_fcr = mean(fcr), # gets group averages
            count = n(),
            sd = sd(fcr),
            se = sd/sqrt(count)) %>%
  select(-sd) %>%
  print()
# Plot
ggplot(babies_fcr, aes(x = age, y = fcr, color = direction, fill = direction)) + 
  geom_jitter(alpha = 0.5) + 
  geom_smooth(method = "lm", se = FALSE) +
  scale_y_continuous(limits = c(-1,1)) +
  theme_bw() + 
  theme(panel.grid.major.x = element_blank()) +
  facet_grid(. ~ lang) +
  ggtitle("Face-Chest Ratios") +
  xlab("") +
  ylab("FCR")

# Bar chart
# babies_fcr %>% 
#   group_by(agegroup, lang, direction, name) %>%
#   summarise(fcr = mean(fcr)) %>% # gets average looking percent for each baby
#   group_by(agegroup, lang, direction) %>%
#   summarise(mean_fcr = mean(fcr), # gets group averages
#             count = n(),
#             sd = sd(fcr),
#             se = sd/sqrt(count)) %>% 
#   ggplot(aes(x = lang, y = mean_fcr, fill = direction)) + 
#   geom_col(position = "dodge") + 
#   geom_errorbar(aes(ymin = mean_fcr - se, ymax = mean_fcr + se), 
#                 position = position_dodge(width = 0.9), width = 0.25) + 
#   scale_y_continuous(limits = c(-1,1)) +
#   theme_minimal() + 
#   theme(panel.grid.major.x = element_blank()) +
#   facet_wrap("agegroup") +
#   ggtitle("Face-Chest Ratios") +
#   xlab("") +
#   ylab("FCR")

What will a linear mixed model tell us? (with no age interactions)

  • No effect of age. Interesting. Maybe because we don’t have that many babies.
  • Strong effect of language: SE babies have overall +0.50 FCR than NSE babies (p = 0.008); in other words, they are more attracted to the face.
  • Effect of direction: Reversal increases FCR by +0.12 across all babies (p = 0.032). Interesting.
  • Weak language x direction interaction. It seems NSE babies have a bigger reversal effect than SE babies? Trying to judge from the heat maps. (p = 0.095). BUt it’s so weak so possibly not worth mentioning.
fcr_lm <- lmer(fcr ~ age + lang * direction + (1|name) + (1|story), data = babies_fcr)
summary(fcr_lm)
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: fcr ~ age + lang * direction + (1 | name) + (1 | story)
   Data: babies_fcr

REML criterion at convergence: 435.5

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.2135 -0.7622  0.1098  0.6343  2.3489 

Random effects:
 Groups   Name        Variance Std.Dev.
 name     (Intercept) 0.14776  0.3844  
 story    (Intercept) 0.01342  0.1158  
 Residual             0.15708  0.3963  
Number of obs: 349, groups:  name, 26; story, 8

Fixed effects:
                          Estimate Std. Error        df t value Pr(>|t|)  
(Intercept)                0.07490    0.26654  24.59417   0.281   0.7811  
age                       -0.04443    0.02930  22.87598  -1.516   0.1431  
langSE                     0.45427    0.16972  25.84156   2.677   0.0127 *
directionreversed          0.11887    0.05605 316.96084   2.121   0.0347 *
langSE:directionreversed  -0.14367    0.08683 315.78391  -1.655   0.0990 .
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) age    langSE drctnr
age         -0.908                     
langSE      -0.060 -0.196              
dirctnrvrsd -0.098 -0.002  0.158       
lngSE:drctn  0.060  0.005 -0.249 -0.647
#ggcoef(fcr_lm)

Heat Maps

And now heat maps!

heatmap_babies <- babies %>%
  filter(aoi %in% midline) %>%
  ungroup() %>%
  group_by(lang, name, direction, aoi) %>%
  summarise(percent = mean(percent, na.rm=TRUE)) %>%
  group_by(lang, direction, aoi) %>%
  summarise(percent = mean(percent, na.rm=TRUE)) %>%
  ungroup() %>%
  mutate(aoi = factor(aoi, levels = c("Belly","BelowChest","MidChestBottom","MidChestCenter","MidChestTop",
            "MidFaceBottom","MidFaceCenter","MidFaceTop")))
ggplot(heatmap_babies, aes(x = lang, y = aoi)) +
  geom_tile(aes(fill=percent),color="lightgray",na.rm=TRUE) + 
#  scale_fill_viridis(option = "viridis", direction=-1, limits = c(0,.7), labels = percent, name = "looking time") +
    scale_fill_gradient(low = "#ffffff", high = "#08519c", space = "Lab", limits = c(0,.451), labels = percent, name = "looking time", na.value = "grey50") +
  theme_bw() +
  theme(strip.text.x = element_text(size = 11, color = "black", face = "italic"), 
        strip.background = element_rect(colour = "white", fill = "white"),
        panel.grid.major = element_line(color = "white")) +
  facet_grid(. ~ direction) +
  ylab("") + xlab("") + ggtitle("Eye Gaze Heat Map, by Direction") + 
  scale_y_discrete(expand=c(0,0)) +
  scale_x_discrete(expand = c(0,0))

ggplot(heatmap_babies, aes(x = direction, y = aoi)) +
  geom_tile(aes(fill=percent),color="lightgray",na.rm=TRUE) + 
#  scale_fill_viridis(option = "viridis", direction=-1, limits = c(0,.7), labels = percent, name = "looking time") +
    scale_fill_gradient(low = "#ffffff", high = "#08519c", space = "Lab", limits = c(0,.451), labels = percent, name = "looking time", na.value = "grey50") +
  theme_bw() +
  theme(strip.text.x = element_text(size = 11, color = "black", face = "italic"), 
        strip.background = element_rect(colour = "white", fill = "white"),
        panel.grid.major = element_line(color = "white")) +
  facet_grid(. ~ lang) +
  ylab("") + xlab("") + ggtitle("Eye Gaze Heat Map, by Language") + 
  scale_y_discrete(expand=c(0,0)) +
  scale_x_discrete(expand = c(0,0))

Different Ways to Visualize Reversal

I want to try to visualize reversal effects a different way. Maybe this.

# Get participant-level data
babies_fcr2 <- babies_fcr %>%
  group_by(name, age, lang, direction) %>%
  summarise(fcr = mean(fcr))
# reversal_effect_lm <- lmer(fcr ~ age + lang * direction + (1|name), data = babies_fcr2)
# summary(reversal_effect_lm)
ggplot(babies_fcr2, aes(x = direction, y = fcr, color = lang, fill = lang)) +
  geom_point() +
  geom_line(aes(group = name)) +
  facet_grid(. ~ lang) + 
  scale_y_continuous(limits = c(-1,1)) +
  theme_bw()

Or a reversal effect chart? Okay, so this chart tells us overall there really wasn’t much of a reversal effect for SE babies, they’re all hovering around 0. Interesting. While there seems to be a reversal effect for NSE babies where they look at the face more during reversed stories!

# Get participant-level data
babies_fcr3 <- babies_fcr2 %>%
  spread(direction, fcr) %>%
  group_by(name, age, lang) %>%
  summarise(diff = forward - reversed)
ggplot(babies_fcr3, aes(x = age, y = diff, color = lang)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  scale_y_continuous(limits = c(-1,1)) +
  theme_bw() +
  ggtitle("Reversal Effect") +
  ylab("Forward FCR - Reversed FCR")

This analysis of within-subject variation here:

# First get the mean of each trial, THEN the participant-level means
within_subjects <- babies_fcr %>%
  group_by(name, lang, direction, story, repetition) %>%
  summarise(fcr = mean(fcr, na.rm = TRUE),
            count = n()) %>%
  group_by(name, lang, direction) %>%
  summarise(mean = mean(fcr, na.rm = TRUE),
            se = sd(fcr, na.rm = TRUE)/sqrt(n()),
            count = n())
# Then spread out mean and SE columns by direction
within_subjects_means <- within_subjects %>%
  select(-se, -count) %>%
  spread(direction, mean, sep = "_")
within_subjects_se <- within_subjects %>%
  select(-mean, -count) %>%
  spread(direction, se, sep = "SE")
within_subjects <- left_join(within_subjects_means, within_subjects_se, by = c("name","lang"))
# Now let's plot
lims <- c(-1,1)
within_subjects %>%
  ggplot(aes(x = direction_forward, y = direction_reversed, color = lang)) +
  geom_abline() +
  geom_point(size = 2) + 
  geom_errorbar(aes(ymin=direction_reversed-directionSEreversed, ymax=direction_reversed+directionSEreversed)) +
  geom_errorbarh(aes(xmin=direction_forward-directionSEforward, xmax=direction_forward+directionSEforward)) +
  theme_bw() +
  theme(aspect.ratio = 1) +
  scale_x_continuous("forward", limits = c(-1,1)) +
  scale_y_continuous("reversed", limits = c(-1,1)) +
  ggtitle("FCR Means") +
  facet_wrap("lang")

And a classic box/error plot with age collapsed.

babies_fcr2 %>%
  group_by(lang, direction) %>%
  summarise(fcr_mean = mean(fcr),
            sd = sd(fcr),
            n = n(),
            se = sd/sqrt(n)) %>%
  ggplot(aes(x = lang, y = fcr_mean, fill = direction)) +
  geom_bar(stat = "identity", position = position_dodge()) + 
  geom_errorbar(aes(ymin = fcr_mean-se, ymax = fcr_mean+se), position = position_dodge(0.9), width = 0.2) +
  scale_y_continuous(limits = c(-0.5, 0.5)) +
  theme_linedraw()

Discussion re AOI/anatomical data

The biggest change is we lost the interaction between language and direction. For the ICSLA abstract we reported a strong interaction (p = 0.01), but now it’s p = 0.10. Shoot.

The interpretation here is that:

  • All babies looked equally at all videos regardless of language or direction. Good!
  • SE babies are stronger face-lookers than NSE babies. (Same as ICSLA)
  • Reversal appears to affect NSE babies more than SE babies? (Marginal). This is unlike ICSLA.

XY Space Data

We’ll load the data from the childxydata.feather file made in 06rawxydata.Rmd. So any new babies, please run the first code block in 06 to include it. Then we’ll keep all the babies we also have in the AOI data group.

included <- babies %>%
  ungroup() %>%
  select(name) %>% 
  distinct() %>%
  unlist()
xydata <- read_feather("../Child Data/childxydata.feather") %>%
  rename(name = participant) %>%
  filter(name %in% included)
# Get ages
ages <- read_csv("childrenages.csv") %>%
  rename(name = participant)
xydata <- xydata %>% left_join(ages, by = "name") %>%
  mutate(age = age*12) %>%
  mutate(agegroup = case_when(
    age <= 8.99 ~ "younger",
    age >= 9.0 & age < 15 ~ "older"
  )) %>%
  mutate(language = case_when(
    language == "EnglishExposed" ~ "NSE",
    language == "SignLanguageExposed" ~ "SE"
  )) %>%
  rename(lang = language) %>%
  select(name, group, gender, lang, condition, mark, trial, repetition, x, y, age, agegroup) %>%
  separate(condition, into = c("story", "clip", "direction")) %>%
  unite("story", c("story", "clip")) %>%
  mutate(direction = case_when(
    direction == "ER" ~ "reversed",
    direction == "FW" ~ "forward"
  )) %>%
  mutate(name = factor(name),
         group = factor(group),
         gender = factor(gender),
         lang = factor(lang),
         story = factor(story),
         direction = factor(direction),
         mark = factor(mark),
         trial = factor(trial),
         repetition = factor(repetition),
         agegroup = factor(agegroup))

Overall Looking

Let’s check that we have no significant group or condition differences in terms of valid (not empty) data points collected. This is same as “Global Looking” we have above, really, but w raw xy data.

xy_overall <- xydata %>%
  filter(!is.na(x)) %>%
  group_by(name, age, lang, direction, story, repetition) %>%
  summarise(data_points = n()) # gets total looking percent for each trial for each baby
# Table of means
xy_overall %>% 
  group_by(name, lang, direction) %>%
  summarise(data_points = mean(data_points)) %>% # get average looking percent for each baby
  group_by(lang, direction) %>%
  summarise(mean_data_points = mean(data_points),
            count = n(),
            sd = sd(data_points),
            se = sd/sqrt(count)) %>%
  select(-sd) %>%
  print()
ggplot(xy_overall, aes(x = age, y = data_points, color = direction, fill = direction)) + 
  geom_jitter(alpha = 0.5) +
  facet_grid(. ~ lang) +
  geom_smooth(method = "lm", se = FALSE) +
  ggtitle("Data Points") +
  xlab("age (months)") +
  ylab("data points recorded") + 
  theme_bw() 

A linear model shows there were no significant effects of any factors. We can conclude there was no effect on how much data was collected from the babies.

overall_xy_lm <- lmer(data_points ~ age + lang * direction + (direction|name) + (direction|story), data = xy_overall)
summary(overall_xy_lm) 
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: data_points ~ age + lang * direction + (direction | name) + (direction |      story)
   Data: xy_overall

REML criterion at convergence: 5528.3

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.7588 -0.6663 -0.0078  0.6278  2.5542 

Random effects:
 Groups   Name              Variance Std.Dev. Corr 
 name     (Intercept)       20748.3  144.04        
          directionreversed   827.6   28.77   1.00 
 story    (Intercept)       22549.1  150.16        
          directionreversed  4215.1   64.92   -0.05
 Residual                   52385.3  228.88        
Number of obs: 401, groups:  name, 26; story, 8

Fixed effects:
                         Estimate Std. Error      df t value Pr(>|t|)    
(Intercept)               524.548    120.054  29.519   4.369 0.000141 ***
age                         8.531     12.060  23.322   0.707 0.486339    
langSE                      8.304     68.321  23.886   0.122 0.904275    
directionreversed         -20.437     38.068  11.693  -0.537 0.601428    
langSE:directionreversed  -16.638     48.592 131.756  -0.342 0.732595    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) age    langSE drctnr
age         -0.828                     
langSE      -0.040 -0.204              
dirctnrvrsd -0.045 -0.003  0.063       
lngSE:drctn  0.026  0.002 -0.130 -0.499
#ggcoef(overall_xy_lm)

XY Data LMMs

Now we’re going to run LMMs on babies’ raw:

  • horizontal spread (middle 50% of x data; xIQR)
  • vertical spread (middle 50% of y data; yIQR)
  • viewing area (A = middle-x * middle-y; area)

But to do this we first trim each baby’s data, getting rid of the first 60 samples (0.5 secs) of each trial.

xydata <- xydata %>%
  group_by(name,trial) %>%
  slice(60:n())
iqr <- xydata %>%
  group_by(name, age, lang, story, direction, trial) %>%
  summarise(xIQR = IQR(x,na.rm=TRUE),
                   yIQR = IQR(y,na.rm=TRUE),
                   xmed = median(x, na.rm=TRUE),
                   ymed = median(y, na.rm=TRUE),
                   area = xIQR*yIQR)
head(iqr,20)

Middle X

Results tell us that there is a significant effect of age where horizontal spread grew narrower with older babies (p = 0.024; slope = -2.2 px/month). There was a marginal effect of language where SE babies have slightly narrower horizontal spread (11 px; p = 0.088). No effects of reversal or interactions.

xiqr_mean <- iqr %>% 
  group_by(lang, direction, name) %>%
  summarise(xIQR = mean(xIQR, na.rm = T)) %>% # gets average looking percent for each baby
  group_by(lang, direction) %>%
  summarise(mean_xIQR = mean(xIQR), # gets group averages
            count = n(),
            sd = sd(xIQR),
            se = sd/sqrt(count)) %>%
  select(-sd) %>%
  print()
# Plot
ggplot(iqr, aes(x = age, y = xIQR, color = direction, fill = direction)) + 
  geom_jitter(alpha = 0.5) + 
  geom_smooth(method = "lm", se = FALSE) +
  theme_bw() + 
  theme(panel.grid.major.x = element_blank()) +
  facet_grid(. ~ lang) +
  ggtitle("Horizontal Spread") +
  xlab("") +
  ylab("xIQR")

ggplot(xiqr_mean, aes(x = lang, y = mean_xIQR, fill = direction)) +
  geom_bar(stat = "identity", position = position_dodge()) + 
  geom_errorbar(aes(ymin = mean_xIQR-se, ymax = mean_xIQR+se), position = position_dodge(0.9), width = 0.2) +
  theme_linedraw()

xiqr_lm <- lmer(xIQR ~ age + lang * direction + (1|name) + (1|story), data = iqr)
summary(xiqr_lm)
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: xIQR ~ age + lang * direction + (1 | name) + (1 | story)
   Data: iqr

REML criterion at convergence: 3839.2

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.1407 -0.4782 -0.1683  0.2668  9.8629 

Random effects:
 Groups   Name        Variance Std.Dev.
 name     (Intercept) 104.0925 10.2026 
 story    (Intercept)   0.4767  0.6905 
 Residual             901.8472 30.0308 
Number of obs: 398, groups:  name, 26; story, 8

Fixed effects:
                         Estimate Std. Error       df t value Pr(>|t|)    
(Intercept)               63.5887     8.5596  25.6516   7.429  7.5e-08 ***
age                       -2.2423     0.9342  23.2148  -2.400   0.0248 *  
langSE                   -12.0971     6.0828  41.5383  -1.989   0.0533 .  
directionreversed          1.9022     3.8627 372.0710   0.492   0.6227    
langSE:directionreversed   0.1790     6.1817 369.3456   0.029   0.9769    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) age    langSE drctnr
age         -0.901                     
langSE      -0.106 -0.175              
dirctnrvrsd -0.213 -0.006  0.308       
lngSE:drctn  0.135  0.002 -0.504 -0.625
#ggcoef(xiqr_lm)

Middle Y

We had a significant effect of age where vertical spread got narrower with older babies (p = 0.019, slope = -5.5 px/month). There were no effect of language, direction, or interactions.

yiqr_mean <- iqr %>% 
  group_by(lang, direction, name) %>%
  summarise(yIQR = mean(yIQR, na.rm = T)) %>% # gets average looking percent for each baby
  group_by(lang, direction) %>%
  summarise(mean_yIQR = mean(yIQR), # gets group averages
            count = n(),
            sd = sd(yIQR),
            se = sd/sqrt(count)) %>%
  select(-sd) %>%
  print()
# Plot
ggplot(iqr, aes(x = age, y = yIQR, color = direction, fill = direction)) + 
  geom_jitter(alpha = 0.5) + 
  geom_smooth(method = "lm", se = FALSE) +
  theme_bw() + 
  theme(panel.grid.major.x = element_blank()) +
  facet_grid(. ~ lang) +
  ggtitle("Vertical Spread") +
  xlab("") +
  ylab("yIQR")

ggplot(yiqr_mean, aes(x = lang, y = mean_yIQR, fill = direction)) +
  geom_bar(stat = "identity", position = position_dodge()) + 
  geom_errorbar(aes(ymin = mean_yIQR-se, ymax = mean_yIQR+se), position = position_dodge(0.9), width = 0.2) +
  theme_linedraw()

yiqr_lm <- lmer(yIQR ~ age + lang * direction + (1|name) + (1|story), data = iqr)
summary(yiqr_lm)
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: yIQR ~ age + lang * direction + (1 | name) + (1 | story)
   Data: iqr

REML criterion at convergence: 4251.3

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.7516 -0.5310 -0.2210  0.3696  5.8948 

Random effects:
 Groups   Name        Variance  Std.Dev. 
 name     (Intercept) 6.890e+02 2.625e+01
 story    (Intercept) 7.045e-12 2.654e-06
 Residual             2.480e+03 4.979e+01
Number of obs: 398, groups:  name, 26; story, 8

Fixed effects:
                         Estimate Std. Error      df t value Pr(>|t|)    
(Intercept)               119.653     19.332  24.309   6.189 2.03e-06 ***
age                        -5.506      2.136  23.024  -2.578   0.0168 *  
langSE                    -16.804     13.040  32.003  -1.289   0.2068    
directionreversed           8.778      6.407 371.087   1.370   0.1715    
langSE:directionreversed   -5.713     10.252 370.630  -0.557   0.5777    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) age    langSE drctnr
age         -0.912                     
langSE      -0.079 -0.187              
dirctnrvrsd -0.156 -0.005  0.238       
lngSE:drctn  0.099  0.001 -0.390 -0.625
#ggcoef(yiqr_lm)

Viewing Area

Again, age is significant (p = 0.01) such that area gets smaller with age (about 500 sq. px. per month). No effect of language or direction or interactions.

area_mean <- iqr %>% 
  group_by(lang, direction, name) %>%
  summarise(area = mean(area, na.rm = T)) %>% # gets average looking percent for each baby
  group_by(lang, direction) %>%
  summarise(area_mean = mean(area), # gets group averages
            count = n(),
            sd = sd(area),
            se = sd/sqrt(count)) %>%
  select(-sd) %>%
  print()
# Plot
ggplot(iqr, aes(x = age, y = area, color = direction, fill = direction)) + 
  geom_jitter(alpha = 0.5) + 
  geom_smooth(method = "lm", se = FALSE) +
  theme_bw() + 
  theme(panel.grid.major.x = element_blank()) +
  facet_grid(. ~ lang) +
  ggtitle("Viewing Area") +
  xlab("") +
  ylab("Area (px^2)")

ggplot(area_mean, aes(x = lang, y = area_mean, fill = direction)) +
  geom_bar(stat = "identity", position = position_dodge()) + 
  geom_errorbar(aes(ymin = area_mean-se, ymax = area_mean+se), position = position_dodge(0.9), width = 0.2) +
  theme_linedraw()

area_lm <- lmer(area ~ age + lang * direction + (1|name) + (1|story), data = iqr)
summary(area_lm)
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: area ~ age + lang * direction + (1 | name) + (1 | story)
   Data: iqr

REML criterion at convergence: 8154.5

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.6788 -0.2727 -0.1278  0.0299 13.4544 

Random effects:
 Groups   Name        Variance  Std.Dev. 
 name     (Intercept) 6.231e+06 2.496e+03
 story    (Intercept) 4.677e-10 2.163e-05
 Residual             5.292e+07 7.275e+03
Number of obs: 398, groups:  name, 26; story, 8

Fixed effects:
                         Estimate Std. Error       df t value Pr(>|t|)    
(Intercept)               9372.80    2085.26    25.71   4.495  0.00013 ***
age                       -589.26     227.76    23.24  -2.587  0.01640 *  
langSE                   -2048.69    1480.52    41.32  -1.384  0.17386    
directionreversed          712.93     935.49   372.10   0.762  0.44648    
langSE:directionreversed  -303.67    1497.33   371.32  -0.203  0.83940    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) age    langSE drctnr
age         -0.902                     
langSE      -0.105 -0.176              
dirctnrvrsd -0.211 -0.006  0.307       
lngSE:drctn  0.134  0.002 -0.502 -0.625
#ggcoef(area_lm)

Plotting Viewing Area

medians <- iqr %>%
  group_by(name,lang,direction) %>%
  summarise(xIQR = median(xIQR,na.rm=TRUE),
                   yIQR = median(yIQR,na.rm=TRUE),
                   xmed = median(xmed,na.rm=TRUE),
                   ymed = median(ymed,na.rm=TRUE)) %>%
  group_by(lang,direction) %>% 
  summarise(xIQR = mean(xIQR,na.rm=TRUE),
                   yIQR = mean(yIQR,na.rm=TRUE),
                   x = median(xmed,na.rm=TRUE),
                   y = median(ymed,na.rm=TRUE)) %>%
  mutate(y = y*-1,
         xmin = x-(xIQR/2),
         xmax = x+(xIQR/2),
         ymin = y-(yIQR/2),
         ymax = y+(yIQR/2))
img <- png::readPNG("cindy.png")
g <- grid::rasterGrob(img, interpolate=TRUE, width=unit(1,"npc"), height=unit(1,"npc")) 
ggplot(medians, aes(fill=direction,color=direction)) +
  annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
  theme_linedraw() +
  scale_x_continuous(limits = c(0,1080), expand = c(0, 0)) +
  scale_y_continuous(limits = c(-720,0), expand = c(0, 0)) +
  facet_wrap("lang")

Puppies

Get puppy data!

# puppies1 <- read_tsv("../Child Data/_puppies/all puppies group 1 sums.txt", na = "-") %>% 
#   clean_names() %>%
#   select(-c(age, analysis, check_if_want_scatterplot, gender, language)) %>%
#   rename(name = x1) %>%
#   mutate(mean = rowMeans(.[2:10], na.rm = T)) %>%
#   select(name, mean)
#   
# puppies2 <- read_tsv("../Child Data/_puppies/all puppies group 2 sums.txt", na = "-") %>% 
#   clean_names() %>%
#   select(-c(age, analysis, check_if_want_scatterplot, gender, language)) %>%
#   rename(name = x1) %>%
#   mutate(mean = rowMeans(.[2:17], na.rm = T)) %>%
#   select(name, mean)
# 
# puppies <- rbind(puppies1, puppies2)
# 
# participants <- xydata %>% 
#   select(name, group, gender, lang) %>% 
#   distinct()
# 
# puppies <- left_join(participants, puppies, by = "name")
# 
# summary(lm(data = puppies, mean ~ lang))
# 
# puppies_se <- filter(puppies, lang == "NSE")
# puppies_nse <- filter(puppies, lang == "SE")
# 
# t.test(puppies_se$mean, puppies_nse$mean)
# Let's do this again but preserve puppy-level data
puppies1 <- read_tsv("../Child Data/_puppies/all puppies group 1 sums.txt", na = "-") %>% 
  clean_names() %>%
  select(-c(age, analysis, check_if_want_scatterplot, gender, language)) %>%
  rename(name = x1) %>%
  gather(key = puppy, value = sec, -name) %>%
  mutate(pups = case_when(
    str_detect(puppy, "huskies") ~ "huskies",
    str_detect(puppy, "golden") ~ "golden",
    str_detect(puppy, "wawa") ~ "wawa",
    str_detect(puppy, "frisby") ~ "frisby",
    str_detect(puppy, "bulldog") ~ "bulldog",
    str_detect(puppy, "puppy_jpg") ~ "puppy",
    TRUE ~ puppy
  ))
puppies2 <- read_tsv("../Child Data/_puppies/all puppies group 2 sums.txt", na = "-") %>% 
  clean_names() %>%
  select(-c(age, analysis, check_if_want_scatterplot, gender, language)) %>%
  rename(name = x1) %>%
  gather(key = puppy, value = sec, -name) %>%
  mutate(pups = case_when(
    str_detect(puppy, "huskies") ~ "huskies",
    str_detect(puppy, "golden") ~ "golden",
    str_detect(puppy, "wawa") ~ "wawa",
    str_detect(puppy, "frisby") ~ "frisby",
    str_detect(puppy, "bulldog") ~ "bulldog",
    str_detect(puppy, "puppies") ~ "puppies",
    TRUE ~ puppy
  ))
puppies <- rbind(puppies1,puppies2)
participants <- xydata %>%
  select(name, group, gender, lang) %>%
  distinct()
puppies <- left_join(participants, puppies, by = "name")
summary(lmer(data = puppies, sec ~ lang + (1|name) + (1|pups)))
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: sec ~ lang + (1 | name) + (1 | pups)
   Data: puppies

REML criterion at convergence: 15818.6

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.9738 -0.5348 -0.0747  0.3391  8.2862 

Random effects:
 Groups   Name        Variance Std.Dev.
 name     (Intercept) 0.8587   0.9267  
 pups     (Intercept) 0.1115   0.3339  
 Residual             1.8390   1.3561  
Number of obs: 4550, groups:  name, 26; pups, 7

Fixed effects:
            Estimate Std. Error      df t value Pr(>|t|)    
(Intercept)   2.1716     0.2655 29.2732   8.178 4.76e-09 ***
langSE       -0.5248     0.3759 23.9367  -1.396    0.176    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
       (Intr)
langSE -0.545
ggplot(puppies, aes(x = lang, y = sec, fill = lang)) + geom_boxplot()

---
title: "Babies - Study 2 - Results"
output: 
  html_notebook:
    code_folding: hide
    theme: spacelab
    highlight: tango
    toc: yes
    toc_depth: 2
    toc_float: yes
    df_print: paged
---

This is hopefully the results section for the Study 2 NSE & SE babies watching ASL Stories. We have two main factors: 

1. Language (Sign v. English)
1. Direction (Forward v. Reversed)

Ages of babies are from 5 to 14.99 months. That means we lose two "old" CODA babies - Janine (16 mo) and za02 (23 mo). The dataset also contains factors for younger v. older babies *IF* we want to do age group splits. I do not recommend it because we lose all significance that way and we have more precise age information, better than brute groups.  IF we do use it, the baby age groups are such:

* Younger babies = 5 to 8.99 months
* Older babies = 9.0 to 14.99 months

To skip to the XY Data, go here. 
To skip to the AOI/FCR analysis, go here. 

# Demographics
```{r message=FALSE, warning=FALSE}
library(tidyverse)
library(janitor)
library(lme4)
library(lmerTest)
library(scales)
library(feather)
library(GGally)

# Grab data that was produced in 03importcleanbabies.Rmd
babies <- read_feather("cleanedbabyeyedata.feather") %>%
  mutate(age = age*12) %>%
  select(participant, language, age, gender, story, direction, mark, trial, repetition, aoi, secs, percent) %>%
  rename(name = participant) %>%
  mutate(agegroup = case_when(
    age <= 8.99 ~ "younger",
    age >= 9.0 & age < 15 ~ "older"
  )) %>%
  filter(!is.na(agegroup)) %>%
  mutate(language = case_when(
    language == "english" ~ "NSE",
    language =="sign" ~ "SE"
  )) %>%
  rename(lang = language) %>%
  mutate(lang2 = case_when(
    name == "aubrey CODA 11m" ~ "HSE",
    name == "GemmaF_11_4_13_CODA" ~ "HSE",
    name == "Hannah_CODA_7m" ~ "HSE",
    name == "ke11es12_7m_MomTerp" ~ "LSE",
    name == "Miles_14m_SE" ~ "HSE",
    name == "Parker 6 m SIGNING" ~ "HSE",
    name == "Penn 6 months SIGN EXPOSED" ~ "HSE",
    name == "Trinity 8m" ~ "LSE",
    name == "Victoria07_18_17" ~ "LSE",
    name == "Wells_8mos" ~ "LSE",
    TRUE ~ lang
  ))
#  filter(name != "Victoria07_18_17")

babiesinfo <- babies %>%
  select(name, lang, age, gender) %>%
  distinct() %>%
  group_by(lang) %>%
  summarise(N = n(),
            age_mean = mean(age),
            sd = sd(age),
            min = min(age),
            max = max(age))

genders <- babies %>%
  select(name, lang, age, gender) %>%
  distinct() %>%
  group_by(lang, gender) %>%
  summarise(N = n()) %>%
  spread(gender, N)

babiesinfo <- left_join(babiesinfo, genders) %>%
  select(lang, N, Female, Male, age_mean, sd, min, max) %>%
  print()

babies$agegroup <- fct_relevel(babies$agegroup, c("younger","older"))


# IF we do age groups, use this code
# 
# babiesinfo <- babies %>%
#   select(name, lang, age, agegroup, gender) %>%
#   distinct() %>%
#   group_by(lang, agegroup) %>%
#   summarise(N = n(),
#             age_mean = mean(age),
#             sd = sd(age),
#             min = min(age),
#             max = max(age))
# 
# genders <- babies %>%
#   select(name, lang, age, agegroup, gender) %>%
#   distinct() %>%
#   group_by(lang, agegroup, gender) %>%
#   summarise(N = n()) %>%
#   spread(gender, N)
# 
# babiesinfo <- left_join(babiesinfo, genders) %>%
#   select(lang, agegroup, N, Female, Male, age_mean, sd, min, max) %>%
#   print()

```

# Global Looking

We calculated percentages *based on overall clip length* as the denominator. In this way, we can meaningfully contrast looking times at the videos (which are variable lengths) based on different factors. But when we go to AOI analysis we need to re-calculate the percentages so the denominator is based on total looking time, not overall clip length. 

The chart below shows little differences based on factors Age, Language, or Direction. That's good, means the videos were equally engaging for all babies, right? 

```{r}
babies$lang <- as.factor(babies$lang)
babies_overall_looking <- babies %>%
  group_by(name, age, lang, direction, story, repetition) %>%
  summarise(percent = sum(percent)) # gets total looking percent for each trial for each baby

# Table of means
babies_overall_looking %>% 
  group_by(name, lang, direction) %>%
  summarise(percent = mean(percent)) %>% # get average looking percent for each baby
  group_by(lang, direction) %>%
  summarise(mean_percent = mean(percent),
            count = n(),
            sd = sd(percent),
            se = sd/sqrt(count)) %>%
  select(-sd) %>%
  print()

ggplot(babies_overall_looking, aes(x = age, y = percent, color = direction, fill = direction)) + 
  geom_jitter(alpha = 0.5) +
  facet_grid(. ~ lang) +
  geom_smooth(method = "lm", se = FALSE) +
  ggtitle("Video Attention") +
  xlab("age (months)") +
  ylab("percent looking") + 
  theme_bw() + 
  scale_y_continuous(limits = c(0,1), labels = percent) 


# Plot
# babies_overall_looking %>% 
#   group_by(lang, direction, name) %>%
#   summarise(percent = mean(percent)) %>% # gets average looking percent for each baby
#   group_by(lang, direction) %>%
#   summarise(mean_percent = mean(percent), # gets group averages
#             count = n(),
#             sd = sd(percent),
#             se = sd/sqrt(count)) %>% 
#   ggplot(aes(x = lang, y = mean_percent, fill = direction)) + 
#   geom_col(position = "dodge") + 
#   geom_errorbar(aes(ymin = mean_percent - se, ymax = mean_percent + se), 
#                 position = position_dodge(width = 0.9), width = 0.25) + 
#   scale_y_continuous(limits = c(0,1), labels = percent) +
#   theme_minimal() + 
#   theme(panel.grid.major.x = element_blank()) +
# #  facet_wrap("lang") +
#   ggtitle("Video Attention") +
#   xlab("") +
#   ylab("percent looking")

# babies_overall_looking %>%
#   ggplot(aes(x = lang, y = percent, fill = direction)) +
#   facet_wrap("agegroup") + 
#   geom_violin()
```

A linear model shows there were no significant effects of any factors. We can conclude there was no effect on how *much* the babies looked at the stimuli.  

```{r}
global_lm <- lmer(percent ~ age + lang * direction + (direction|name) + (direction|story), data = babies_overall_looking)
summary(global_lm) 
#ggcoef(global_lm)
```

# AOI Looking
Now we'll re-calculate the percentages so the denominator is based on total looking time. All AOIs should sum up to 100% for each trial and each baby. Next let's make a boxplot of all AOIs. 

```{r}
#Recalculate percent
babies <- babies %>%
  select(-percent) %>%
  group_by(name, lang, agegroup, age, direction, story, mark, trial, repetition, gender) %>%
  mutate(totalsec = sum(secs)) %>%
  group_by(name, lang, agegroup, age, direction, story, mark, trial, repetition, gender, aoi) %>%
  mutate(percent = secs/totalsec)

# Boxplot
babies %>%
  ggplot(aes(x = aoi, y = percent, fill = direction)) + 
  geom_boxplot() +
  ggtitle("AOI Attention") +
  theme_bw() + 
  xlab("") +
  theme(axis.text.x = element_text(angle=45, hjust = 1),
        panel.grid.major.x = element_blank()) +
  scale_y_continuous(labels = scales::percent, limits = c(0,1))
```
It appears two important AOIs are MidChestTop and MidFaceBottom. Let's look again only at midline AOIs:

```{r}
midline = c("Belly","BelowChest","MidChestBottom","MidChestCenter","MidChestTop",
            "MidFaceBottom","MidFaceCenter","MidFaceTop")
babies %>%
  filter(aoi %in% midline) %>%
  ggplot(aes(x = aoi, y = percent, fill = direction)) + 
  geom_boxplot() +
  ggtitle("Midline AOI Attention") +
  theme_bw() + 
  xlab("") +
  theme(axis.text.x = element_text(angle=45, hjust = 1),
        panel.grid.major.x = element_blank()) +
  scale_y_continuous(labels = scales::percent, limits = c(0,1))
```

I'm going to run linear models with only MidChestTop or MidFaceBottom, and see what happens. No age interactions.

**MidChestTop:**

* Significant effect of age (p = 0.004) - older babies look at MidChestTop AOI more.
* Significant effect of language (p = 0.006) - NSE babies look +14.6% more than SE babies. 
* Significant effect of direction (p = 0.046) - Babies look about 4.5% less for reversed
* No language X direction interaction. 

**MidFaceBottom:** 

* No effect of age
* Significant effect of language (p = 0.004) - SE babies look +21% more than NSE babies.
* Marginal effect of language (p = 0.073) - Babies look about 2% more for reversed. 
* No language X direction interaction.

```{r}
babies %>%
  filter(aoi %in% c("MidFaceBottom","MidChestTop")) %>%
  ggplot(aes(x = age, y = percent, color = direction, fill = direction)) + 
  geom_jitter(alpha = 0.5) + 
  geom_smooth(method = "lm", se = FALSE) +
  scale_y_continuous(limits = c(0,1), labels = percent) +
  theme_bw() + 
#  theme(panel.grid.major.x = element_blank()) +
  facet_grid(aoi ~ lang) +
  ggtitle("AOI Attention") +
  xlab("") +
  ylab("percent looking")

midchesttop_lm <- lmer(percent ~ age + lang * direction + (1|name) + (1|story), data = filter(babies, aoi == "MidChestTop"))
summary(midchesttop_lm)
#ggcoef(midchesttop_lm)

midfacebottom_lm <- lmer(percent ~ age + lang * direction + (1|name) + (1|story), data = filter(babies, aoi == "MidFaceBottom"))
summary(midfacebottom_lm)
#ggcoef(midfacebottom_lm)

# Bar chart
# babies %>%
#   filter(aoi %in% c("MidFaceBottom","MidChestTop")) %>%
#   group_by(agegroup, lang, direction, name, aoi) %>%
#   summarise(percent = mean(percent)) %>% # gets average looking percent for each baby
#   group_by(agegroup, lang, direction, aoi) %>%
#   summarise(mean_percent = mean(percent), # gets group averages
#             count = n(),
#             sd = sd(percent),
#             se = sd/sqrt(count)) %>% 
#   ggplot(aes(x = lang, y = mean_percent, fill = direction)) + 
#   geom_col(position = "dodge") + 
#   geom_errorbar(aes(ymin = mean_percent - se, ymax = mean_percent + se), 
#                 position = position_dodge(width = 0.9), width = 0.25) + 
#   scale_y_continuous(limits = c(0,1), labels = percent) +
#   theme_minimal() + 
#   theme(panel.grid.major.x = element_blank()) +
#   facet_grid(aoi ~ agegroup) +
#   ggtitle("Video Attention") +
#   xlab("") +
#   ylab("percent looking")
```


# Face-Chest Ratio
Next, we'll define a Face-Chest Ratio (FCR) such that:

1. MidFaceCenter, MidFaceBottom = Face
1. MidChestTop, MidChestCenter, MidChestBottom, BelowChest = Chest
1. FCR = face - chest / face + chest

We did not include Belly or MidFaceTop because of very low looking rates according to the boxplots above.

```{r}
babies_fcr <- babies %>%
  spread(aoi,percent) %>%
  group_by(name, age, agegroup, lang, gender, direction, story, repetition) %>%
  summarise(face = sum(MidFaceCenter, MidFaceBottom, na.rm = TRUE),
         chest = sum(MidChestTop, MidChestCenter, MidChestBottom, BelowChest, na.rm = TRUE),
         fcr = (face - chest) / (face + chest))

# Table of means
babies_fcr %>% 
  group_by(lang, direction, name) %>%
  summarise(fcr = mean(fcr)) %>% # gets average looking percent for each baby
  group_by(lang, direction) %>%
  summarise(mean_fcr = mean(fcr), # gets group averages
            count = n(),
            sd = sd(fcr),
            se = sd/sqrt(count)) %>%
  select(-sd) %>%
  print()

# Plot
ggplot(babies_fcr, aes(x = age, y = fcr, color = direction, fill = direction)) + 
  geom_jitter(alpha = 0.5) + 
  geom_smooth(method = "lm", se = FALSE) +
  scale_y_continuous(limits = c(-1,1)) +
  theme_bw() + 
  theme(panel.grid.major.x = element_blank()) +
  facet_grid(. ~ lang) +
  ggtitle("Face-Chest Ratios") +
  xlab("") +
  ylab("FCR")

# Bar chart
# babies_fcr %>% 
#   group_by(agegroup, lang, direction, name) %>%
#   summarise(fcr = mean(fcr)) %>% # gets average looking percent for each baby
#   group_by(agegroup, lang, direction) %>%
#   summarise(mean_fcr = mean(fcr), # gets group averages
#             count = n(),
#             sd = sd(fcr),
#             se = sd/sqrt(count)) %>% 
#   ggplot(aes(x = lang, y = mean_fcr, fill = direction)) + 
#   geom_col(position = "dodge") + 
#   geom_errorbar(aes(ymin = mean_fcr - se, ymax = mean_fcr + se), 
#                 position = position_dodge(width = 0.9), width = 0.25) + 
#   scale_y_continuous(limits = c(-1,1)) +
#   theme_minimal() + 
#   theme(panel.grid.major.x = element_blank()) +
#   facet_wrap("agegroup") +
#   ggtitle("Face-Chest Ratios") +
#   xlab("") +
#   ylab("FCR")
```

What will a linear mixed model tell us? (with no age interactions)

* No effect of age. Interesting. Maybe because we don't have that many babies. 
* Strong effect of language: SE babies have overall +0.50 FCR than NSE babies (p = 0.008); in other words, they are more attracted to the face. 
* Effect of direction: Reversal *increases* FCR by +0.12 across all babies (p = 0.032). Interesting.
* Weak language x direction interaction. It seems NSE babies have a bigger reversal effect than SE babies? Trying to judge from the heat maps. (p = 0.095). BUt it's so weak so possibly not worth mentioning.


```{r}
fcr_lm <- lmer(fcr ~ age + lang * direction + (1|name) + (1|story), data = babies_fcr)
summary(fcr_lm)
#ggcoef(fcr_lm)
```

# Heat Maps
And now heat maps!

```{r}
heatmap_babies <- babies %>%
  filter(aoi %in% midline) %>%
  ungroup() %>%
  group_by(lang, name, direction, aoi) %>%
  summarise(percent = mean(percent, na.rm=TRUE)) %>%
  group_by(lang, direction, aoi) %>%
  summarise(percent = mean(percent, na.rm=TRUE)) %>%
  ungroup() %>%
  mutate(aoi = factor(aoi, levels = c("Belly","BelowChest","MidChestBottom","MidChestCenter","MidChestTop",
            "MidFaceBottom","MidFaceCenter","MidFaceTop")))

ggplot(heatmap_babies, aes(x = lang, y = aoi)) +
  geom_tile(aes(fill=percent),color="lightgray",na.rm=TRUE) + 
#  scale_fill_viridis(option = "viridis", direction=-1, limits = c(0,.7), labels = percent, name = "looking time") +
    scale_fill_gradient(low = "#ffffff", high = "#08519c", space = "Lab", limits = c(0,.451), labels = percent, name = "looking time", na.value = "grey50") +
  theme_bw() +
  theme(strip.text.x = element_text(size = 11, color = "black", face = "italic"), 
        strip.background = element_rect(colour = "white", fill = "white"),
        panel.grid.major = element_line(color = "white")) +
  facet_grid(. ~ direction) +
  ylab("") + xlab("") + ggtitle("Eye Gaze Heat Map, by Direction") + 
  scale_y_discrete(expand=c(0,0)) +
  scale_x_discrete(expand = c(0,0))

ggplot(heatmap_babies, aes(x = direction, y = aoi)) +
  geom_tile(aes(fill=percent),color="lightgray",na.rm=TRUE) + 
#  scale_fill_viridis(option = "viridis", direction=-1, limits = c(0,.7), labels = percent, name = "looking time") +
    scale_fill_gradient(low = "#ffffff", high = "#08519c", space = "Lab", limits = c(0,.451), labels = percent, name = "looking time", na.value = "grey50") +
  theme_bw() +
  theme(strip.text.x = element_text(size = 11, color = "black", face = "italic"), 
        strip.background = element_rect(colour = "white", fill = "white"),
        panel.grid.major = element_line(color = "white")) +
  facet_grid(. ~ lang) +
  ylab("") + xlab("") + ggtitle("Eye Gaze Heat Map, by Language") + 
  scale_y_discrete(expand=c(0,0)) +
  scale_x_discrete(expand = c(0,0))
```

# Different Ways to Visualize Reversal
I want to try to visualize reversal effects a different way. Maybe this. 

```{r}
# Get participant-level data
babies_fcr2 <- babies_fcr %>%
  group_by(name, age, lang, direction) %>%
  summarise(fcr = mean(fcr))

# reversal_effect_lm <- lmer(fcr ~ age + lang * direction + (1|name), data = babies_fcr2)
# summary(reversal_effect_lm)

ggplot(babies_fcr2, aes(x = direction, y = fcr, color = lang, fill = lang)) +
  geom_point() +
  geom_line(aes(group = name)) +
  facet_grid(. ~ lang) + 
  scale_y_continuous(limits = c(-1,1)) +
  theme_bw()

```

Or a reversal effect chart? Okay, so this chart tells us overall there really wasn't much of a reversal effect for SE babies, they're all hovering around 0. Interesting. While there seems to be a reversal effect for NSE babies where they look at the face more during reversed stories! 

```{r}
# Get participant-level data
babies_fcr3 <- babies_fcr2 %>%
  spread(direction, fcr) %>%
  group_by(name, age, lang) %>%
  summarise(diff = forward - reversed)

ggplot(babies_fcr3, aes(x = age, y = diff, color = lang)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  scale_y_continuous(limits = c(-1,1)) +
  theme_bw() +
  ggtitle("Reversal Effect") +
  ylab("Forward FCR - Reversed FCR")
```

This analysis of within-subject variation here: 

```{r}
# First get the mean of each trial, THEN the participant-level means
within_subjects <- babies_fcr %>%
  group_by(name, lang, direction, story, repetition) %>%
  summarise(fcr = mean(fcr, na.rm = TRUE),
            count = n()) %>%
  group_by(name, lang, direction) %>%
  summarise(mean = mean(fcr, na.rm = TRUE),
            se = sd(fcr, na.rm = TRUE)/sqrt(n()),
            count = n())
# Then spread out mean and SE columns by direction
within_subjects_means <- within_subjects %>%
  select(-se, -count) %>%
  spread(direction, mean, sep = "_")
within_subjects_se <- within_subjects %>%
  select(-mean, -count) %>%
  spread(direction, se, sep = "SE")
within_subjects <- left_join(within_subjects_means, within_subjects_se, by = c("name","lang"))

# Now let's plot
lims <- c(-1,1)
within_subjects %>%
  ggplot(aes(x = direction_forward, y = direction_reversed, color = lang)) +
  geom_abline() +
  geom_point(size = 2) + 
  geom_errorbar(aes(ymin=direction_reversed-directionSEreversed, ymax=direction_reversed+directionSEreversed)) +
  geom_errorbarh(aes(xmin=direction_forward-directionSEforward, xmax=direction_forward+directionSEforward)) +
  theme_bw() +
  theme(aspect.ratio = 1) +
  scale_x_continuous("forward", limits = c(-1,1)) +
  scale_y_continuous("reversed", limits = c(-1,1)) +
  ggtitle("FCR Means") +
  facet_wrap("lang")
```

And a classic box/error plot with age collapsed. 

```{r}
babies_fcr2 %>%
  group_by(lang, direction) %>%
  summarise(fcr_mean = mean(fcr),
            sd = sd(fcr),
            n = n(),
            se = sd/sqrt(n)) %>%
  ggplot(aes(x = lang, y = fcr_mean, fill = direction)) +
  geom_bar(stat = "identity", position = position_dodge()) + 
  geom_errorbar(aes(ymin = fcr_mean-se, ymax = fcr_mean+se), position = position_dodge(0.9), width = 0.2) +
  scale_y_continuous(limits = c(-0.5, 0.5)) +
  theme_linedraw()

```

# Discussion re AOI/anatomical data
The biggest change is we lost the interaction between language and direction. For the ICSLA abstract we reported a strong interaction (p = 0.01), but now it's p = 0.10. Shoot. 

The interpretation here is that:

* All babies looked equally at all videos regardless of language or direction. Good!
* SE babies are stronger face-lookers than NSE babies. (Same as ICSLA)
* Reversal appears to affect NSE babies more than SE babies? (Marginal). This is unlike ICSLA. 

```{r eval=FALSE, fig.height=12, fig.width=12, include=FALSE}
# Correlations

# Let's try correlations
babies_nse <- babies %>% 
  filter(aoi %in% midline) %>%
  filter(lang == "NSE") %>%
  group_by(name, direction, aoi) %>% 
  summarise(percent = mean(percent)) %>%
  ungroup() %>%
  mutate(direction = case_when(
    direction == "forward" ~ "fw",
    direction == "reversed" ~ "rv"
  )) %>% 
  unite(aoi2, direction, aoi, sep = "_") %>%
  spread(aoi2, percent) %>%
  select(-name)

babies_se <- babies %>% 
  filter(aoi %in% midline) %>%
  filter(lang == "SE") %>%
  group_by(name, direction, aoi) %>% 
  summarise(percent = mean(percent)) %>%
  ungroup() %>%
  mutate(direction = case_when(
    direction == "forward" ~ "fw",
    direction == "reversed" ~ "rv"
  )) %>% 
  unite(aoi2, direction, aoi, sep = "_") %>%
  spread(aoi2, percent) %>%
  select(-name)

ggcorr(babies_nse, label = TRUE, label_size = 5, label_round = 2, label_alpha = TRUE, hjust = 0.9, size = 5, color = "grey50", layout.exp = 1) + ggtitle("NSE")

ggcorr(babies_se, label = TRUE, label_size = 5, label_round = 2, label_alpha = TRUE, hjust = 0.9, size = 5, color = "grey50", layout.exp = 1) + ggtitle("SE")

library(corrr)
babies_nse %>% correlate() %>% network_plot(min_cor=0.6) + ggtitle("NSE Babies")
babies_se %>% correlate() %>% network_plot(min_cor=0.6) + ggtitle("SE Babies")
```

# XY Space Data
We'll load the data from the `childxydata.feather` file made in 06rawxydata.Rmd. So any new babies, please run the first code block in 06 to include it. Then we'll keep all the babies we also have in the AOI data group. 

```{r message=FALSE, warning=FALSE}
included <- babies %>%
  ungroup() %>%
  select(name) %>% 
  distinct() %>%
  unlist()

xydata <- read_feather("../Child Data/childxydata.feather") %>%
  rename(name = participant) %>%
  filter(name %in% included)

# Get ages
ages <- read_csv("childrenages.csv") %>%
  rename(name = participant)
xydata <- xydata %>% left_join(ages, by = "name") %>%
  mutate(age = age*12) %>%
  mutate(agegroup = case_when(
    age <= 8.99 ~ "younger",
    age >= 9.0 & age < 15 ~ "older"
  )) %>%
  mutate(language = case_when(
    language == "EnglishExposed" ~ "NSE",
    language == "SignLanguageExposed" ~ "SE"
  )) %>%
  rename(lang = language) %>%
  select(name, group, gender, lang, condition, mark, trial, repetition, x, y, age, agegroup) %>%
  separate(condition, into = c("story", "clip", "direction")) %>%
  unite("story", c("story", "clip")) %>%
  mutate(direction = case_when(
    direction == "ER" ~ "reversed",
    direction == "FW" ~ "forward"
  )) %>%
  mutate(name = factor(name),
         group = factor(group),
         gender = factor(gender),
         lang = factor(lang),
         story = factor(story),
         direction = factor(direction),
         mark = factor(mark),
         trial = factor(trial),
         repetition = factor(repetition),
         agegroup = factor(agegroup))
```

## Overall Looking
Let's check that we have no significant group or condition differences in terms of valid (not empty) data points collected. This is same as "Global Looking" we have above, really, but w raw xy data. 

```{r}
xy_overall <- xydata %>%
  filter(!is.na(x)) %>%
  group_by(name, age, lang, direction, story, repetition) %>%
  summarise(data_points = n()) # gets total looking percent for each trial for each baby

# Table of means
xy_overall %>% 
  group_by(name, lang, direction) %>%
  summarise(data_points = mean(data_points)) %>% # get average looking percent for each baby
  group_by(lang, direction) %>%
  summarise(mean_data_points = mean(data_points),
            count = n(),
            sd = sd(data_points),
            se = sd/sqrt(count)) %>%
  select(-sd) %>%
  print()

ggplot(xy_overall, aes(x = age, y = data_points, color = direction, fill = direction)) + 
  geom_jitter(alpha = 0.5) +
  facet_grid(. ~ lang) +
  geom_smooth(method = "lm", se = FALSE) +
  ggtitle("Data Points") +
  xlab("age (months)") +
  ylab("data points recorded") + 
  theme_bw() 
```


A linear model shows there were no significant effects of any factors. We can conclude there was no effect on how *much* data was collected from the babies.   

```{r}
overall_xy_lm <- lmer(data_points ~ age + lang * direction + (direction|name) + (direction|story), data = xy_overall)
summary(overall_xy_lm) 
#ggcoef(overall_xy_lm)
```

## XY Data LMMs
Now we're going to run LMMs on babies' raw: 

* horizontal spread (middle 50% of x data; xIQR)
* vertical spread (middle 50% of y data; yIQR)
* viewing area (A = middle-x * middle-y; area)

But to do this we first trim each baby's data, getting rid of the first 60 samples (0.5 secs) of each trial. 

```{r}
xydata <- xydata %>%
  group_by(name,trial) %>%
  slice(60:n())

iqr <- xydata %>%
  group_by(name, age, lang, story, direction, trial) %>%
  summarise(xIQR = IQR(x,na.rm=TRUE),
                   yIQR = IQR(y,na.rm=TRUE),
                   xmed = median(x, na.rm=TRUE),
                   ymed = median(y, na.rm=TRUE),
                   area = xIQR*yIQR)
head(iqr,20)

```


### Middle X

Results tell us that there is a significant effect of age where horizontal spread grew narrower with older babies (p = 0.024; slope = -2.2 px/month). There was a marginal effect of language where SE babies have slightly narrower horizontal spread (11 px; p = 0.088). No effects of reversal or interactions.

```{r}
xiqr_mean <- iqr %>% 
  group_by(lang, direction, name) %>%
  summarise(xIQR = mean(xIQR, na.rm = T)) %>% # gets average looking percent for each baby
  group_by(lang, direction) %>%
  summarise(mean_xIQR = mean(xIQR), # gets group averages
            count = n(),
            sd = sd(xIQR),
            se = sd/sqrt(count)) %>%
  select(-sd) %>%
  print()

# Plot
ggplot(iqr, aes(x = age, y = xIQR, color = direction, fill = direction)) + 
  geom_jitter(alpha = 0.5) + 
  geom_smooth(method = "lm", se = FALSE) +
  theme_bw() + 
  theme(panel.grid.major.x = element_blank()) +
  facet_grid(. ~ lang) +
  ggtitle("Horizontal Spread") +
  xlab("") +
  ylab("xIQR")

ggplot(xiqr_mean, aes(x = lang, y = mean_xIQR, fill = direction)) +
  geom_bar(stat = "identity", position = position_dodge()) + 
  geom_errorbar(aes(ymin = mean_xIQR-se, ymax = mean_xIQR+se), position = position_dodge(0.9), width = 0.2) +
  theme_linedraw()

xiqr_lm <- lmer(xIQR ~ age + lang * direction + (1|name) + (1|story), data = iqr)
summary(xiqr_lm)
#ggcoef(xiqr_lm)
```


### Middle Y

We had a significant effect of age where vertical spread got narrower with older babies (p = 0.019, slope = -5.5 px/month). There were no effect of language, direction, or interactions. 

```{r}
yiqr_mean <- iqr %>% 
  group_by(lang, direction, name) %>%
  summarise(yIQR = mean(yIQR, na.rm = T)) %>% # gets average looking percent for each baby
  group_by(lang, direction) %>%
  summarise(mean_yIQR = mean(yIQR), # gets group averages
            count = n(),
            sd = sd(yIQR),
            se = sd/sqrt(count)) %>%
  select(-sd) %>%
  print()

# Plot
ggplot(iqr, aes(x = age, y = yIQR, color = direction, fill = direction)) + 
  geom_jitter(alpha = 0.5) + 
  geom_smooth(method = "lm", se = FALSE) +
  theme_bw() + 
  theme(panel.grid.major.x = element_blank()) +
  facet_grid(. ~ lang) +
  ggtitle("Vertical Spread") +
  xlab("") +
  ylab("yIQR")

ggplot(yiqr_mean, aes(x = lang, y = mean_yIQR, fill = direction)) +
  geom_bar(stat = "identity", position = position_dodge()) + 
  geom_errorbar(aes(ymin = mean_yIQR-se, ymax = mean_yIQR+se), position = position_dodge(0.9), width = 0.2) +
  theme_linedraw()

yiqr_lm <- lmer(yIQR ~ age + lang * direction + (1|name) + (1|story), data = iqr)
summary(yiqr_lm)
#ggcoef(yiqr_lm)
```

### Viewing Area
Again, age is significant (p = 0.01) such that area gets smaller with age (about 500 sq. px. per month). No effect of language or direction or interactions. 

```{r}
area_mean <- iqr %>% 
  group_by(lang, direction, name) %>%
  summarise(area = mean(area, na.rm = T)) %>% # gets average looking percent for each baby
  group_by(lang, direction) %>%
  summarise(area_mean = mean(area), # gets group averages
            count = n(),
            sd = sd(area),
            se = sd/sqrt(count)) %>%
  select(-sd) %>%
  print()

# Plot
ggplot(iqr, aes(x = age, y = area, color = direction, fill = direction)) + 
  geom_jitter(alpha = 0.5) + 
  geom_smooth(method = "lm", se = FALSE) +
  theme_bw() + 
  theme(panel.grid.major.x = element_blank()) +
  facet_grid(. ~ lang) +
  ggtitle("Viewing Area") +
  xlab("") +
  ylab("Area (px^2)")

ggplot(area_mean, aes(x = lang, y = area_mean, fill = direction)) +
  geom_bar(stat = "identity", position = position_dodge()) + 
  geom_errorbar(aes(ymin = area_mean-se, ymax = area_mean+se), position = position_dodge(0.9), width = 0.2) +
  theme_linedraw()

area_lm <- lmer(area ~ age + lang * direction + (1|name) + (1|story), data = iqr)
summary(area_lm)
#ggcoef(area_lm)
```

## Plotting Viewing Area 

```{r}
medians <- iqr %>%
  group_by(name,lang,direction) %>%
  summarise(xIQR = median(xIQR,na.rm=TRUE),
                   yIQR = median(yIQR,na.rm=TRUE),
                   xmed = median(xmed,na.rm=TRUE),
                   ymed = median(ymed,na.rm=TRUE)) %>%
  group_by(lang,direction) %>% 
  summarise(xIQR = mean(xIQR,na.rm=TRUE),
                   yIQR = mean(yIQR,na.rm=TRUE),
                   x = median(xmed,na.rm=TRUE),
                   y = median(ymed,na.rm=TRUE)) %>%
  mutate(y = y*-1,
         xmin = x-(xIQR/2),
         xmax = x+(xIQR/2),
         ymin = y-(yIQR/2),
         ymax = y+(yIQR/2))
img <- png::readPNG("cindy.png")
g <- grid::rasterGrob(img, interpolate=TRUE, width=unit(1,"npc"), height=unit(1,"npc")) 
ggplot(medians, aes(fill=direction,color=direction)) +
  annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
  geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) + 
  theme_linedraw() +
  scale_x_continuous(limits = c(0,1080), expand = c(0, 0)) +
  scale_y_continuous(limits = c(-720,0), expand = c(0, 0)) +
  facet_wrap("lang")
```

# Puppies
Get puppy data!

```{r message=FALSE, warning=FALSE}
# puppies1 <- read_tsv("../Child Data/_puppies/all puppies group 1 sums.txt", na = "-") %>% 
#   clean_names() %>%
#   select(-c(age, analysis, check_if_want_scatterplot, gender, language)) %>%
#   rename(name = x1) %>%
#   mutate(mean = rowMeans(.[2:10], na.rm = T)) %>%
#   select(name, mean)
#   
# puppies2 <- read_tsv("../Child Data/_puppies/all puppies group 2 sums.txt", na = "-") %>% 
#   clean_names() %>%
#   select(-c(age, analysis, check_if_want_scatterplot, gender, language)) %>%
#   rename(name = x1) %>%
#   mutate(mean = rowMeans(.[2:17], na.rm = T)) %>%
#   select(name, mean)
# 
# puppies <- rbind(puppies1, puppies2)
# 
# participants <- xydata %>% 
#   select(name, group, gender, lang) %>% 
#   distinct()
# 
# puppies <- left_join(participants, puppies, by = "name")
# 
# summary(lm(data = puppies, mean ~ lang))
# 
# puppies_se <- filter(puppies, lang == "NSE")
# puppies_nse <- filter(puppies, lang == "SE")
# 
# t.test(puppies_se$mean, puppies_nse$mean)

# Let's do this again but preserve puppy-level data
puppies1 <- read_tsv("../Child Data/_puppies/all puppies group 1 sums.txt", na = "-") %>% 
  clean_names() %>%
  select(-c(age, analysis, check_if_want_scatterplot, gender, language)) %>%
  rename(name = x1) %>%
  gather(key = puppy, value = sec, -name) %>%
  mutate(pups = case_when(
    str_detect(puppy, "huskies") ~ "huskies",
    str_detect(puppy, "golden") ~ "golden",
    str_detect(puppy, "wawa") ~ "wawa",
    str_detect(puppy, "frisby") ~ "frisby",
    str_detect(puppy, "bulldog") ~ "bulldog",
    str_detect(puppy, "puppy_jpg") ~ "puppy",
    TRUE ~ puppy
  ))

puppies2 <- read_tsv("../Child Data/_puppies/all puppies group 2 sums.txt", na = "-") %>% 
  clean_names() %>%
  select(-c(age, analysis, check_if_want_scatterplot, gender, language)) %>%
  rename(name = x1) %>%
  gather(key = puppy, value = sec, -name) %>%
  mutate(pups = case_when(
    str_detect(puppy, "huskies") ~ "huskies",
    str_detect(puppy, "golden") ~ "golden",
    str_detect(puppy, "wawa") ~ "wawa",
    str_detect(puppy, "frisby") ~ "frisby",
    str_detect(puppy, "bulldog") ~ "bulldog",
    str_detect(puppy, "puppies") ~ "puppies",
    TRUE ~ puppy
  ))

puppies <- rbind(puppies1,puppies2)

participants <- xydata %>%
  select(name, group, gender, lang) %>%
  distinct()

puppies <- left_join(participants, puppies, by = "name")

summary(lmer(data = puppies, sec ~ lang + (1|name) + (1|pups)))

ggplot(puppies, aes(x = lang, y = sec, fill = lang)) + geom_boxplot()
```

